from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras import layers

(X_train, y_train), (X_test, y_test) = mnist.load_data()
num_channels = 1

num_train_samples = X_train.shape[0]
num_test_samples = X_test.shape[0]
image_height = X_train.shape[1]
image_width = X_train.shape [2]

X_train = X_train.reshape(num_train_samples, image_height,
                          image_width,1).astype('float32')
X_test = X_test.reshape(num_test_samples, image_height,
                        image_width,1).astype('float32')
  
X_train = X_train / 255   # values [0..1] improve results
X_test = X_test / 255
    
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

model = Sequential()
model.add(layers.Conv2D(16, (3,3), activation='relu',
          input_shape=(image_height,image_width,num_channels)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation='relu'))  
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu')) 
model.add(layers.Dense(10, activation='softmax'))

print(model.summary())

EPOCHS = 20
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=EPOCHS, batch_size=128, verbose=0)
